import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
from sun.items import SunItem


class SunSpiderSpider(CrawlSpider):
    name = 'sun_spider'
    allowed_domains = ['wz.sun0769.com']
    start_urls = ['https://wz.sun0769.com/political/index/politicsNewest?id=1&page=1']

    rules = (
        Rule(LinkExtractor(allow=r'.*/political/index/politicsNewest\?id=1&page=\d'), follow=True),
        Rule(LinkExtractor(allow=r'.*/political/politics/index\?id=\d+'), follow=False,callback='parse_info')
    )

    def parse_info(self, response):
        print('='*30)
        title = response.xpath('//p[@class="focus-details"]/text()').get()
        info = response.xpath('//div[contains(@class, "focus-date")]')
        user = info.xpath('./span[contains(@class, "fl")]//text()').getall()[1].strip()
        data_time = info.xpath('./span[contains(@class, "fl")]//text()').getall()[2].strip()
        zhuangtai = info.xpath('./span[contains(@class, "fl")]//text()').getall()[3].strip()
        zt = zhuangtai.replace('状态：\n', '').strip()
        content = ''.join(response.xpath('//div[@class="details-box"]//text()').getall()).strip()
        item = SunItem(
            title = title,
            user = user,
            data_time = data_time,
            zt = zt,
            content = content
        )
        yield item